cap log close 
log using "${logpath}clean_patientlevel.log", replace

/*******************************************************************************
clean_patientlevel.do

This code takes the patient-month-level panel created by SAS and cleans it.
*******************************************************************************/

clear all

use pvntmcare* year RXGROUP total_dialsess usrds_id ///
	NHWHITE BLACK HISP ASIAN ROTHER MALE ZIPCODE STATE COUNTY INC_AGE fac_FREE ///
	month provusrd newprovusrd inc_* months_with_esrd eoh_* fac_age newchain ///
	rides NErides NEdialysisrides death ambul_NPI* ///
	using "${rawdatapath}patient_panel_complete.dta" ///
	if (pvntmcare1!=0 | pvntmcare2!=0) & year>=2003 & year<=2017 & ///
	RXGROUP=="1" & total_dialsess>0 & ///
	!missing(total_dialsess) & !missing(usrds_id), clear

rename (NHWHITE BLACK HISP ASIAN ROTHER MALE ///
	ZIPCODE STATE COUNTY INC_AGE fac_FREE total_dialsess) ///
	(nhwhite black hisp asian rother male zipcode state county inc_age fac_free dialsess)

gen t=ym(year,month)
format t %tm

order usrds_id t
bysort usrds_id t: gen n=_N
drop if n>1
drop n
sort usrds_id t
xtset usrds_id t

*Filling in newprovusrd
sort provusrd year t
gen newnewprov=newprovusrd
by provusrd: replace newnewprov=newnewprov[_n-1] if missing(newnewprov) & _n!=1
gsort provusrd -year -t
gen newnewprov2=newprovusrd
by provusrd: replace newnewprov2=newnewprov2[_n-1] if missing(newnewprov2) & _n!=1
gen newnewagree=newnewprov==newnewprov2 if !missing(newnewprov) & !missing(newnewprov2)
tab newnewagree
gen newprovusrd2=newnewprov
replace newprovusrd2=newnewprov2 if missing(newprovusrd2)
drop newnewprov newnewprov2 newnewagree newprovusrd
rename newprovusrd2 newprovusrd
drop if missing(newprovusrd)

/* This gets demographics at the zip code level, so I can control for 
	education, income, etc */
sort zipcode year
merge m:1 zipcode using X:\data\Census\ZCTA\ACS_11.dta
keep if _merge~=2
drop _merge
egen income_bin = cut(median_income), at(0 25000 33000 41000 1000000)

/*Patient risk adjusters*/
egen bmi_bin = cut(inc_bmi), at(0 18.5 30 40 100)
replace bmi_bin = round(bmi_bin,1) /*gets rid of decimal for 18.5 */
egen gfr_bin = cut(inc_gfr_epi), at(0 5 10 15 20 25 100)

foreach level in 0 19 30 40 {
	gen bmi`level'=bmi_bin==`level' if !missing(bmi_bin)
}
foreach level in 0 5 10 15 20 25 {
	gen gfr`level'=gfr_bin==`level' if !missing(gfr_bin)
}

replace inc_hgb = inc_hcrit/3 if missing(inc_hgb)
egen inc_hgb_group = cut(inc_hgb),at(5 10 11 12 13 20)

gen pat_age = year-inc_year+inc_age
gen pat_age2 = pat_age*pat_age
gen pat_age3 = pat_age2*pat_age
egen age_bin = cut(pat_age), at(18 44 65 70)

gen dial_tenure = months_with_esrd
gen dial_tenure2 = dial_tenure*dial_tenure
gen dial_tenure3 = dial_tenure2*dial_tenure
gen high_album = inc_album>3

gen inc_assist2 = .
replace inc_assist2 = 1 if inc_assist==1 | inc_trans==1 | inc_ambul==1
replace inc_assist2 = 0 if (inc_assist~=1 & inc_trans~=1 & inc_ambul~=1) & ///
							(!missing(inc_assist) | !missing(inc_trans) | ///
							!missing(inc_ambul))
drop inc_assist
rename inc_assist2 inc_assist

gen hosp_any = eoh_count>0 if !missing(eoh_count)
gen hosp_fluid1 = eoh_fluid1>0 if !missing(eoh_fluid1)

foreach var in hosp_any hosp_fluid1 {
	replace `var'=0 if missing(`var')
}

/*Facility char*/
gen fac_age2 = fac_age^2
egen cid=group(newchain)
replace newchain="DVA" if newchain=="DAVITA"
replace newchain="FMC" if newchain=="FRESENIUS"
replace newchain="OTH" if !missing(newchain)&newchain~="DVA"&newchain~="FMC"&newchain~="IND"
encode newchain, gen(chainid)
drop newchain
rename chainid newchain
label define chains 1 "Davita" 2 "Fresenius" 3 "Independent" 4 "Other Chain"
label values newchain chains 

/*Touching up rides*/
foreach var in rides NErides NEdialysisrides {
	replace `var'=0 if missing(`var')
}

gen any_ride=NEdialysisrides>0
gen emerg_rides=rides-NErides
gen rider_death=death if any_ride==1
gen rider_hosp=hosp_any if any_ride==1
bysort usrds_id: egen lifetime_rides=total(NEdialysisrides)

/*Defining prior authorization status*/
destring county, replace
destring state, replace
gen priorauth_date=0 if !missing(state)
replace priorauth_date=1 if state==34 | state==42 | state==45
replace priorauth_date=2 if state==10 | state==11 | state==24 | state==37 | state==51 | state==54
gen prior_auth = 0 if !missing(state)
replace prior_auth = 1 if priorauth_date==1 & (year>=2015 | (year==2014 & month==12))
replace prior_auth = 1 if priorauth_date==2 & year>=2016

/*Merging to get districts*/
merge m:1 county state using "${cleandatapath}DOJcounty_Xwalk.dta"
drop if _merge==2
drop _merge

merge m:1 state using "${cleandatapath}DOJstate_Xwalk.dta"
drop if _merge==2
replace district=statedistrict if missing(district)
drop _merge statedistrict

replace district="MISSING" if missing(district)
encode district, gen(dist_code)

/*Merging to get litigation status*/
merge m:1 district using "${cleandatapath}DOJ_data_district.dta"
drop _merge

/*Getting firm exits*/
rename ambul_NPI1 NPI
merge m:1 NPI using "${cleandatapath}exit_at_priorauth.dta"
drop if _merge==2
drop _merge
replace exit_date=mofd(exit_date)
rename (NPI exit_date) (ambul_NPI1 exit_date1)
rename ambul_NPI2 NPI
merge m:1 NPI using "${cleandatapath}exit_at_priorauth.dta"
drop if _merge==2
drop _merge
replace exit_date=mofd(exit_date)
rename (NPI exit_date) (ambul_NPI2 exit_date2)

gen ride_exit_now1=t==exit_date1 if any_ride==1
gen ride_exit_now2=t==exit_date2 if any_ride==1
gen ride_exit_now=max(ride_exit_now1,ride_exit_now2) if any_ride==1

keep inc_diabetes inc_hyper bmi* gfr* ///
	male nhwhite black hisp asian rother pat_age pat_age2 pat_age3 ///
	dial_tenure dial_tenure2 dial_tenure3 inc_hgb_group ///
	high_album inc_cancer inc_drug inc_drinker inc_smoker inc_assist ///
	inc_copd inc_ashd inc_pvd inc_ischem inc_chd ///
	fac_age fac_age2 state income_bin newchain fac_free ///
	usrds_id prior_auth NEdialysisrides priorauth_date year ///
	death hosp_any ambul_NPI* t dialsess inc_noins inc_empcur ///
	emerg_rides lifetime_rides hosp_fluid1 newprovusrd dist_code ///
	civ_* crim_* ride_exit_now any_ride rider_death rider_hosp
	
save "${cleandatapath}patientlevel.dta", replace

log close